Correcting the equations for triangular boundaries.
First, pull the GSD simulations code that includes calculations for type I error (\(\alpha\)) and power.
gsd_simulations <- function(n_analyses = 3,
upper_bounds = c(2.5, 2, 1.5),
lower_bounds = c(0, 0.75, 1.5),
n_patients = c(20, 40, 60),
null_hypothesis = 0,
alt_hypothesis = 0.5,
variance = 1) {
# sanity checks
# sanity checks, function stops
if(length(upper_bounds) != length(lower_bounds)) {
stop("Warning: number of upper bounds must equal number of lower bounds")
}
if(length(n_patients) != length(upper_bounds)) {
stop("Warning: number of patients vector must equal number of bounds")
}
if(n_analyses != length(upper_bounds)) {
stop("Warning: number of analyses must equal number of bounds")
}
# assign values for null and alt hypotheses
theta_0 <- null_hypothesis
delta <- alt_hypothesis
# empty mean vectors to fill
mean_0 <- c()
mean_1 <- c()
# need to parse the upper and lower boundaries of the design
# for futility and efficacy, must put the bounds of integration correctly
# for pmvnorm
futility_l_bounds <- list()
futility_u_bounds <- list()
efficacy_l_bounds <- list()
efficacy_u_bounds <- list()
n_analyses <- length(upper_bounds)
for (i in 1:n_analyses) {
# special case of i = 1
if (i == 1) {
futility_l_bounds[[i]] <- lower_bounds[i]
futility_u_bounds[[i]] <- upper_bounds[i]
efficacy_l_bounds[[i]] <- lower_bounds[i]
efficacy_u_bounds[[i]] <- upper_bounds[i]
next
}
# all other cases
futility_l_bounds[[i]] <- c(lower_bounds[1:i-1], -Inf)
futility_u_bounds[[i]] <- c(upper_bounds[1:i-1], lower_bounds[i])
efficacy_l_bounds[[i]] <- c(lower_bounds[1:i-1], upper_bounds[i])
efficacy_u_bounds[[i]] <- c(upper_bounds[1:i-1], Inf)
}
# list of probabilities to return
probs_to_return <- list()
# list of SIGMAs
SIGMA_list <- list()
for (i in 1:n_analyses) {
if (i == 1) next
# start with diagonal matrix for SIGMA
SIGMA <- diag(nrow = i)
# n = 2, need to fill all but 11, 22
# n = 3, need to fill all but 11, 22, 33
# n = 4, need to fill all but 11, 22, 33, 44
# etc.
for(i in 1:i) {
for(j in 1:i) {
# leave the 1s on the diagonal, skip this iteration of for loop
if(i == j) next
# when i is less than j, the lower number of patients will be in numerator
if(i < j) SIGMA[i,j] <- sqrt(n_patients[i] / n_patients[j])
# when i is greater than j, the lower number of patients will be in numerator
if(i > j) SIGMA[i,j] <- sqrt(n_patients[j] / n_patients[i])
}
}
SIGMA_list[[i]] <- SIGMA
}
for (i in 1:n_analyses) {
##############
# ANALYSIS 1 #
##############
if(i == 1) {
# mean under null
mean_0[i] <- theta_0 * sqrt(n_patients[i]/(2*variance))
# mean under alternative
mean_1[i] <- delta * sqrt(n_patients[i]/(2*variance))
# prob stop for futility, null
futility_null <- pnorm(futility_l_bounds[[i]],
mean = mean_0,
sd = sqrt(variance))
# prob stop for efficacy, null
efficacy_null <- pnorm(efficacy_u_bounds[[i]],
mean = mean_0,
sd = sqrt(variance),
lower.tail = FALSE)
# prob stop for futility, alt
futility_alt <- pnorm(futility_l_bounds[[i]],
mean = mean_1,
sd = sqrt(variance))
# prob stop for efficacy
efficacy_alt <- pnorm(efficacy_u_bounds[[i]],
mean = mean_1,
sd = sqrt(variance),
lower.tail = FALSE)
probs_to_return[[i]] <- c(futility_null, efficacy_null, futility_alt, efficacy_alt)
names(probs_to_return[[i]]) <- c("futility_null", "efficacy_null", "futility_alt", "efficacy_alt")
next
}
######################
# ALL OTHER ANALYSES #
######################
# next mean under null
mean_0[i] <- theta_0 * sqrt(n_patients[i] / (2 * variance))
# next mean under alternative
mean_1[i] <- delta * sqrt(n_patients[i]/ (2*variance))
# bounds for these will be same
# futility under null
futility_null <- pmvnorm(lower = futility_l_bounds[[i]],
upper = futility_u_bounds[[i]],
mean = mean_0, corr = SIGMA_list[[i]])
# futility under alt
futility_alt <- pmvnorm(lower = futility_l_bounds[[i]],
upper = futility_u_bounds[[i]],
mean = mean_1, corr = SIGMA_list[[i]])
# bounds for these will be same
# futility under null
efficacy_null <- pmvnorm(lower = efficacy_l_bounds[[i]],
upper = efficacy_u_bounds[[i]],
mean = mean_0, corr = SIGMA_list[[i]])
# futility under alt
efficacy_alt <- pmvnorm(lower = efficacy_l_bounds[[i]],
upper = efficacy_u_bounds[[i]],
mean = mean_1, corr = SIGMA_list[[i]])
probs_to_return[[i]] <- c(futility_null, efficacy_null, futility_alt, efficacy_alt)
names(probs_to_return[[i]]) <- c("futility_null", "efficacy_null", "futility_alt", "efficacy_alt")
}
# vector to collect the sum of futility and efficacy probabilities
sum_probs <- c()
# get alpha and power
alpha <- 0
power <- 0
for (i in 1:n_analyses) {
# pull the probabilities from the list
tmp_probs <- probs_to_return[[i]]
# gather them into a vector
# 3:4 because we want to calculate under the alternative
sum_probs <- c(sum_probs, sum(tmp_probs[3:4]))
alpha <- tmp_probs[2] + alpha
power <- tmp_probs[4] + power
}
# calculate the expected sample size
ess <- sum(n_patients * sum_probs)
# add the expected sample size to the list
return_values <- append(probs_to_return, values = c(ess, alpha, power))
# name the list
names_for_list <- as.vector(sapply("analysis_", paste0, 1:n_analyses))
names_for_list <- c(names_for_list, "expected_sample_size", "alpha", "power")
names(return_values) <- names_for_list
# return probabilities and ESS
return_values
}
There are two mistakes that seem to have been issues:
triangular_bounds <- function(n_analyses = 3,
alpha = 0.05,
delta = 0.5) {
I_L_term1 <- (4 * (0.583^2)) / n_analyses
I_L_term2 <- 8 * log(1/(2*alpha))
I_L_term3 <- (2 * 0.583) / sqrt(n_analyses)
I_L <- (sqrt(I_L_term1 + I_L_term2) - I_L_term3)^2 * (1 / delta)^2
bounds_term1 <- (2/delta) * log(1/(2*alpha))
bounds_term2 <- 0.583 * sqrt(I_L / n_analyses)
analysis_fracs <- (1:n_analyses / n_analyses)
I_L_fracs <- I_L * analysis_fracs
e_l <- (bounds_term1 - bounds_term2 + ((0.25*delta) * analysis_fracs * I_L )) / sqrt(I_L_fracs)
f_l <- (-bounds_term1 + bounds_term2 + ((0.75*delta) * analysis_fracs * I_L )) / sqrt(I_L_fracs)
return(list(upper_bounds = e_l, lower_bounds = f_l, info = I_L_fracs))
}
Quick assessment of the characteristics of the calculated boundaries.
delta <- 0.5
bounds <- triangular_bounds(n_analyses = 3,
alpha = 0.05,
delta = delta)
gsd_simulations(n_analyses = 3,
upper_bounds = bounds$upper_bounds,
lower_bounds = bounds$lower_bounds,
alt_hypothesis = delta,
n_patients = c(20,40,60),
variance = 1)
## $analysis_1
## futility_null efficacy_null futility_alt efficacy_alt
## 0.50000000 0.01702085 0.05692315 0.29513713
##
## $analysis_2
## futility_null efficacy_null futility_alt efficacy_alt
## 0.37463706 0.02267064 0.09404163 0.36072856
##
## $analysis_3
## futility_null efficacy_null futility_alt efficacy_alt
## 0.07417766 0.01149280 0.05659186 0.13657629
##
## $expected_sample_size
## [1] 36.8221
##
## $alpha
## [1] 0.05118428
##
## $power
## [1] 0.792442
Dr. Robertson’s code for triangular bounds.
# type I error
alpha <- 0.05
# number of analyses
K <- 3
# delta, true assumed clinically relevant difference
delta <- 0.5
Imax = (sqrt(4*0.583^2/K + 8*log(1/(2*alpha))) - 2*0.583/sqrt(K))^2/delta^2
# Score statistic
dk = (2/delta)*log(1/(2*alpha)) - 0.583*sqrt(Imax/K) + (delta/4)*((1:K)/K)*Imax
ck = -(2/delta)*log(1/(2*alpha)) + 0.583*sqrt(Imax/K) + (3*delta/4)*((1:K)/K)*Imax
ak = dk/sqrt(Imax*(1:K)/K)
bk = ck/sqrt(Imax*(1:K)/K)
ggplot() +
geom_line(mapping = aes(x = 1:K, y = bk)) +
geom_line(mapping = aes(x = 1:K, y = ak)) +
geom_line(mapping = aes(x = 1:K, y = ck), color="red") +
geom_line(mapping = aes(x = 1:K, y = dk), color="red")
According to the paper, the symmetric boundaries are calculated using the following formula:
\[ r_l = C_{WT}\left(\frac{l}{L}\right)^{\Omega−0.5} \]
Pocock (1977) and O’Brien and Fleming (1979) boundaries take \(\Omega = 0.5\) or \(\Omega = 0\), respectively. Further, a numerical search is used for any chosen \(\Omega\) to determine the value of \(C_{WT}\) that implies the correct type I error rate \(\alpha\). A further search is then used to ascertain the required sample size for the power constraint.
Boundaries given by \(\Omega = 0.5\):
\[ r_l = C_{WT}\left(\frac{l}{L}\right)^{0.5−0.5} = C_{WT} \]
Using our gsd_simulations function, we can perform this
numerical search.
To perform the search as coded below, we need two functions that rounds down to the nearest power of ten.
# round down to the nearest floor tenth, e.g., 0.004 -> 0.001
round_down10 <- function(x) 10^floor(log10(x))
# check that this is a whole number
is.wholenumber <- function(x, tol = .Machine$double.eps^0.5) abs(x - round(x)) < tol
Now, we can use the below function to calculate the Pocock boundaries.
calculate_pocock <- function(n_analyses = 3,
alpha = 0.05,
beta = 0.1,
epsilon = 0.00001,
n_patients = c(20, 40, 60)) {
if(!is.wholenumber(log(epsilon, 10))) {
stop("Epsilon must be at log base 10 intervals (e.g., 0.1, 0.01, 0.001.")
}
# hyperparameter tuned to get close to alpha needed
z_mod <- -epsilon * 100
initial_boundaries <- rep_len(qnorm(alpha/n_analyses), length.out = n_analyses)
pk_upper_bounds <- -initial_boundaries
pk_lower_bounds <- initial_boundaries
sim_vals <- gsd_simulations(n_analyses = n_analyses,
upper_bounds = pk_upper_bounds,
lower_bounds = pk_lower_bounds,
n_patients = n_patients)
while( round_down10(abs(sim_vals$alpha - alpha)) != epsilon){
if ( round_down10(abs(sim_vals$alpha - alpha)) > epsilon) {
initial_boundaries <- initial_boundaries - z_mod
pk_upper_bounds <- -initial_boundaries
pk_lower_bounds <- initial_boundaries
} else if ( round_down10(abs(sim_vals$alpha - alpha)) < epsilon) {
initial_boundaries <- initial_boundaries + z_mod
pk_upper_bounds <- -initial_boundaries
pk_lower_bounds <- initial_boundaries
}
sim_vals <- gsd_simulations(n_analyses = n_analyses,
upper_bounds = pk_upper_bounds,
lower_bounds = pk_lower_bounds,
n_patients = n_patients)
}
return( list(upper_bounds = pk_upper_bounds,
lower_bounds = pk_lower_bounds,
simulation = sim_vals) )
}
calculate_pocock()
## $upper_bounds
## [1] 1.993045 1.993045 1.993045
##
## $lower_bounds
## [1] -1.993045 -1.993045 -1.993045
##
## $simulation
## $simulation$analysis_1
## futility_null efficacy_null futility_alt efficacy_alt
## 0.0231282470 0.0231282470 0.0001756609 0.3402040133
##
## $simulation$analysis_2
## futility_null efficacy_null futility_alt efficacy_alt
## 1.546020e-02 1.546020e-02 8.511684e-06 2.904928e-01
##
## $simulation$analysis_3
## futility_null efficacy_null futility_alt efficacy_alt
## 1.131565e-02 1.131502e-02 6.392175e-07 1.780095e-01
##
## $simulation$expected_sample_size
## [1] 29.10825
##
## $simulation$alpha
## [1] 0.04990346
##
## $simulation$power
## [1] 0.8087063
We can compare these boundaries with a published R package:
rpact.
library(rpact)
## Installation qualification for rpact 4.2.1 has not yet been performed.
## Please run testPackage() before using the package in GxP relevant environments.
getDesignGroupSequential(
sided = 1,
alpha = 0.05,
informationRates = c(0.33, 0.67, 1),
typeOfDesign = "P"
)
Design parameters and output of group sequential design
User defined parameters
Derived from user defined parameters
Default parameters
Output
Boundaries given by \(\Omega = 0\):
\[ r_l = C_{WT}\left(\frac{l}{L}\right)^{0−0.5} = C_{WT}\left(\frac{l}{L}\right)^{−0.5} \]
calculate_of <- function(n_analyses = 3,
alpha = 0.05,
beta = 0.1,
epsilon = 0.00001,
n_patients = c(20, 40, 60)) {
if(!is.wholenumber(log(epsilon, 10))) {
stop("Epsilon must be at log base 10 intervals (e.g., 0.1, 0.01, 0.001.")
}
# hyperparameter tuned to get close to alpha needed
z_mod <- -epsilon * 100
bounds_start <- rep_len(qnorm(alpha/n_analyses), length.out = n_analyses)
initial_boundaries <- bounds_start * (1:n_analyses / n_analyses)^-0.5
of_upper_bounds <- -initial_boundaries
of_lower_bounds <- initial_boundaries
sim_vals <- gsd_simulations(n_analyses = n_analyses,
upper_bounds = of_upper_bounds,
lower_bounds = of_lower_bounds,
n_patients = n_patients)
while( round_down10(abs(sim_vals$alpha - alpha)) != epsilon){
if ( round_down10(abs(sim_vals$alpha - alpha)) > epsilon) {
bounds_start <- bounds_start - z_mod
initial_boundaries <- bounds_start * (1:n_analyses / n_analyses)^-0.5
of_upper_bounds <- -initial_boundaries
of_lower_bounds <- initial_boundaries
} else if ( round_down10(abs(sim_vals$alpha - alpha)) < epsilon) {
bounds_start <- bounds_start + z_mod
initial_boundaries <- bounds_start * (1:n_analyses / n_analyses)^-0.5
of_upper_bounds <- -initial_boundaries
of_lower_bounds <- initial_boundaries
}
sim_vals <- gsd_simulations(n_analyses = n_analyses,
upper_bounds = of_upper_bounds,
lower_bounds = of_lower_bounds,
n_patients = n_patients)
}
return( list(upper_bounds = of_upper_bounds,
lower_bounds = of_lower_bounds,
simulation = sim_vals) )
}
calculate_of()
## $upper_bounds
## [1] 2.961885 2.094369 1.710045
##
## $lower_bounds
## [1] -2.961885 -2.094369 -1.710045
##
## $simulation
## $simulation$analysis_1
## futility_null efficacy_null futility_alt efficacy_alt
## 1.528809e-03 1.528809e-03 2.772646e-06 8.367847e-02
##
## $simulation$analysis_2
## futility_null efficacy_null futility_alt efficacy_alt
## 1.718510e-02 1.718510e-02 7.196005e-06 4.749465e-01
##
## $simulation$analysis_3
## futility_null efficacy_null futility_alt efficacy_alt
## 3.124598e-02 3.123870e-02 3.490707e-06 2.964627e-01
##
## $simulation$expected_sample_size
## [1] 38.45974
##
## $simulation$alpha
## [1] 0.0499526
##
## $simulation$power
## [1] 0.8550876
We can compare these boundaries with a published R package:
rpact.
getDesignGroupSequential(
sided = 1,
alpha = 0.05,
informationRates = c(0.33, 0.67, 1),
typeOfDesign = "OF"
)
Design parameters and output of group sequential design
User defined parameters
Derived from user defined parameters
Default parameters
Output
Prepare a fuction that will calculate the expected sample size.
simulate_ess <- function(upper_bounds = upper_bounds,
lower_bounds = lower_bounds) {
delta_vector <- seq(from = 0, to = 1, length.out = 101)
ess_vector <- vector(mode = "numeric", length = length(delta_vector))
for (i in 1:length(delta_vector)) {
ess_vector[i] <- gsd_simulations(upper_bounds = upper_bounds,
lower_bounds = lower_bounds,
alt_hypothesis = delta_vector[i])$expected_sample_size
}
return(ess_vector)
}
Calculate the boundaries.
pocock_bounds <- calculate_pocock()
of_bounds <- calculate_of()
tri_bounds <- triangular_bounds()
Graph the bounds.
ggplot() +
geom_line(mapping = aes(x = 1:3, y = pocock_bounds$upper_bounds, color = "Pocock"),
linetype = 2) +
geom_line(mapping = aes(x = 1:3, y = pocock_bounds$lower_bounds, color = "Pocock"),
linetype = 2) +
geom_line(mapping = aes(x = 1:3, y = of_bounds$upper_bounds, color = "O'Brien-Fleming")) +
geom_line(mapping = aes(x = 1:3, y = of_bounds$lower_bounds, color = "O'Brien-Fleming")) +
geom_line(mapping = aes(x = 1:3, y = tri_bounds$upper_bounds, color = "Triangular")) +
geom_line(mapping = aes(x = 1:3, y = tri_bounds$lower_bounds, color = "Triangular")) +
scale_color_manual(name = "Boundary types",
breaks = c("Pocock", "O'Brien-Fleming", "Triangular"),
values = c("Pocock" = "red",
"O'Brien-Fleming" = "blue",
"Triangular" = "purple")) +
labs(x = "Analysis Number", y = "Z score",
title = "Boundary values for different equations")
Calculate the expected sample size vectors.
pocock_ess <- simulate_ess(upper_bounds = pocock_bounds$upper_bounds,
lower_bounds = pocock_bounds$lower_bounds)
of_ess <- simulate_ess(upper_bounds = of_bounds$upper_bounds,
lower_bounds = of_bounds$lower_bounds)
tri_ess <- simulate_ess(upper_bounds = tri_bounds$upper_bounds,
lower_bounds = tri_bounds$lower_bounds)
delta_vector <- seq(from = 0, to = 1, length.out = 101)
ggplot() +
geom_line(mapping = aes(x = delta_vector, y = pocock_ess, color = "Pocock")) +
geom_line(mapping = aes(x = delta_vector, y = of_ess, color = "O'Brien-Fleming")) +
geom_line(mapping = aes(x = delta_vector, y = tri_ess, color = "Triangular")) +
scale_color_manual(name = "Boundary types",
breaks = c("Pocock", "O'Brien-Fleming", "Triangular"),
values = c("Pocock" = "red",
"O'Brien-Fleming" = "blue",
"Triangular" = "purple")) +
labs(x = "True \u03B4", y = "Expected sample size",
title = "Expected sample size over range of true \u03B4 values")
The maximum expected sample size is:
max(pocock_ess)
## [1] 30.39239
max(of_ess)
## [1] 39.49365
max(tri_ess)
## [1] 39.22077
Expected sample size under the null:
pocock_ess[delta_vector==0]
## [1] 3.520207
of_ess[delta_vector==0]
## [1] 5.184612
tri_ess[delta_vector==0]
## [1] 31.37291
Assuming the alternative is \(\delta = 0.5\), we can calculate the sample size under the alternative as well:
pocock_ess[delta_vector==0.5]
## [1] 29.10829
of_ess[delta_vector==0.5]
## [1] 38.45936
tri_ess[delta_vector==0.5]
## [1] 36.82225
# load required libraries
library(gplite)
## This is gplite version 0.13.0
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
gp_optimGenerate data for the function to which the model will be fit.
n <- 5
x <- matrix(seq(-2, 2, length.out = n), ncol = 1)
y <- x^2
Plot the data.
ggplot() +
geom_point(aes(x=x, y=y), size=2) +
xlab('x') + ylab('y')
Initialize the GP model.
# Specify the GP model we want to use:
gp_empty <- gp_init(
# A squared exponential (aka Gaussian aka RBF) kernel
cfs = cf_sexp(
vars = NULL,
lscale = 0.3,
magn = 1,
prior_lscale = prior_logunif(),
prior_magn = prior_logunif(),
normalize = FALSE
),
# Assume Gaussian distributed errors
lik = lik_gaussian(
sigma = 0.5,
prior_sigma = prior_logunif()
),
# Use the full covariance (i.e., do not approximate)
method = method_full()
)
Optimize the model using the internal function.
# Now fit the model to the data:
gp_optimized <- gp_optim(gp_empty, x, y, verbose = FALSE)
Plot the model.
# compute the predictive mean and variance in a grid of points
xt <- seq(-4, 4, len=150)
pred <- gp_pred(gp_optimized, xt, var = T)
# visualize
mu <- pred$mean
lb <- pred$mean - 2*sqrt(pred$var)
ub <- pred$mean + 2*sqrt(pred$var)
ggplot() +
geom_ribbon(aes(x=xt, ymin=lb, ymax=ub), fill='lightgray') +
geom_line(aes(x=xt, y=mu), linewidth = 0.5) +
geom_point(aes(x=x, y=y), size=2) +
xlab('x') + ylab('y')
Pull the energy.
gp_energy(gp_optimized)
## [1] 11.887
Increase the number of restarts.
# Now fit the model to the data:
gp_optimized_10 <- gp_optim(
gp = gp_empty,
x = x,
y = y,
restarts = 10,
verbose = FALSE
)
Plot the model with larger number of restarts.
# compute the predictive mean and variance in a grid of points
pred <- gp_pred(gp_optimized_10, xt, var = T)
# visualize
mu <- pred$mean
lb <- pred$mean - 2*sqrt(pred$var)
ub <- pred$mean + 2*sqrt(pred$var)
ggplot() +
geom_ribbon(aes(x=xt, ymin=lb, ymax=ub), fill='lightgray') +
geom_line(aes(x=xt, y=mu), linewidth = 0.5) +
geom_point(aes(x=x, y=y), size=2) +
xlab('x') + ylab('y')
Pull the energy.
gp_energy(gp_optimized_10)
## [1] 11.887
Increase to 1000 restarts.
# Now fit the model to the data:
gp_optimized_1000 <- gp_optim(
gp = gp_empty,
x = x,
y = y,
restarts = 1000,
verbose = FALSE
)
Plot the model with larger number of restarts.
# compute the predictive mean and variance in a grid of points
pred <- gp_pred(gp_optimized_1000, xt, var = T)
# visualize
mu <- pred$mean
lb <- pred$mean - 2*sqrt(pred$var)
ub <- pred$mean + 2*sqrt(pred$var)
ggplot() +
geom_ribbon(aes(x=xt, ymin=lb, ymax=ub), fill='lightgray') +
geom_line(aes(x=xt, y=mu), linewidth = 0.5) +
geom_point(aes(x=x, y=y), size=2) +
xlab('x') + ylab('y')
Pull the energy.
gp_energy(gp_optimized_1000)
## [1] 11.887
Increase to 1,000,000 restarts.
# Now fit the model to the data:
gp_optimized_1e6 <- gp_optim(
gp = gp_empty,
x = x,
y = y,
restarts = 1e6,
verbose = FALSE
)
Plot the model with larger number of restarts.
# compute the predictive mean and variance in a grid of points
pred <- gp_pred(gp_optimized_1e6, xt, var = T)
# visualize
mu <- pred$mean
lb <- pred$mean - 2*sqrt(pred$var)
ub <- pred$mean + 2*sqrt(pred$var)
ggplot() +
geom_ribbon(aes(x=xt, ymin=lb, ymax=ub), fill='lightgray') +
geom_line(aes(x=xt, y=mu), linewidth = 0.5) +
geom_point(aes(x=x, y=y), size=2) +
xlab('x') + ylab('y')
Pull the energy.
gp_energy(gp_optimized_1e6)
## [1] 11.887
ell <- seq(0.5, 3, by = 0.05)
sigma_f <- seq(2, 8, by = 0.05)
hyperparams <- expand.grid(
ell = ell,
sigma_f = sigma_f
)
# collect energies in the empty vector
energy <- c()
# run 6000+ models
for (i in 1:dim(hyperparams)[1]) {
gp_empty <- gp_init(
# A squared exponential (aka Gaussian aka RBF) kernel
cfs = cf_sexp(
vars = NULL,
lscale = hyperparams$ell[i],
magn = hyperparams$sigma_f[i],
prior_lscale = prior_logunif(),
prior_magn = prior_logunif(),
normalize = FALSE
),
# Assume Gaussian distributed errors
lik = lik_gaussian(
sigma = 0.01,
prior_sigma = prior_logunif()
),
# Use the full covariance (i.e., do not approximate)
method = method_full()
)
gp_raw <- gp_fit(gp_empty, x, y)
energy[i] <- gp_energy(gp_raw)
}
surface_plot <- cbind(hyperparams, energy)
surface_plot[which.min(surface_plot$energy),]
## ell sigma_f energy
## 4212 1.95 6.1 11.05812
Model with \(\ell = 2\) and \(\sigma_f^2 = 6\), near the optimal values. Recall that we are setting \(\sigma_n^2 = 0.01\).
# Specify the GP model we want to use,
# we will specify the lscale and magn using values from above
gp_empty <- gp_init(
# A squared exponential (aka Gaussian aka RBF) kernel
cfs = cf_sexp(
vars = NULL,
lscale = 2,
magn = 6,
prior_lscale = prior_logunif(),
prior_magn = prior_logunif(),
normalize = FALSE
),
# Assume Gaussian distributed errors
lik = lik_gaussian(
sigma = 0.01,
prior_sigma = prior_logunif()
),
# Use the full covariance (i.e., do not approximate)
method = method_full()
)
gp_raw_ell2sigma6 <- gp_fit(gp_empty, x, y)
Plot the model.
# compute the predictive mean and variance in a grid of points
pred_ell2sigma6 <- gp_pred(gp_raw_ell2sigma6, xt, var = T)
# visualize
mu_ell2sigma6 <- pred_ell2sigma6$mean
lb_ell2sigma6 <- pred_ell2sigma6$mean - 2*sqrt(pred_ell2sigma6$var)
ub_ell2sigma6 <- pred_ell2sigma6$mean + 2*sqrt(pred_ell2sigma6$var)
ggplot() +
geom_ribbon(aes(x=xt, ymin=lb_ell2sigma6, ymax=ub_ell2sigma6), fill='lightgray') +
geom_line(aes(x=xt, y=mu_ell2sigma6), linewidth = 0.5) +
geom_point(aes(x=x, y=y), size=2) +
xlab('x') + ylab('y')
Plot the entire surface.
fig <- plot_ly() %>%
add_trace(
x = unique(surface_plot$sigma_f),
y = unique(surface_plot$ell),
z = matrix(surface_plot$energy, nrow = 51, ncol = 121),
type = "surface") %>%
add_trace(
x = 6.1,
y = 1.95,
z = 11.05812,
type = "scatter3d",
mode = "markers"
) %>%
layout(
scene = list(
aspectmode = 'manual', # Set to manual for custom aspect ratio
aspectratio = list(x = 1, y = 1, z = 0.5), # Adjust x, y, z ratios
xaxis = list(
title = "signal variance"
),
yaxis = list(
title = "length-scale parameter"
),
zaxis = list(
title = "-log(marginal likelihood)"
)
)
)
fig
Plot the surface nearest the optimum value.
fig <- plot_ly() %>%
add_trace(
x = unique(surface_plot$sigma_f)[80:121],
y = unique(surface_plot$ell)[10:51],
z = matrix(surface_plot$energy, nrow = 51, ncol = 121)[10:51, 80:121],
type = "surface") %>%
add_trace(
x = 6.1,
y = 1.95,
z = 11.05812,
type = "scatter3d",
mode = "markers"
) %>%
layout(
scene = list(
aspectmode = 'manual', # Set to manual for custom aspect ratio
aspectratio = list(x = 1, y = 1, z = 0.5), # Adjust x, y, z ratios
xaxis = list(
title = "signal variance"
),
yaxis = list(
title = "length-scale parameter"
),
zaxis = list(
title = "-log(marginal likelihood)"
)
)
)
fig
Next, increase the number of points that we are fitting, but within the originally specified range of the function \(x \in (-2, 2)\). Below, I have added 3 new points.
# n is 7 now; used to be 5
n_new <- 7
x_new <- matrix(seq(-2, 2, length.out = n_new), ncol = 1)
y_new <- x_new^2
Plot the data.
ggplot() +
geom_point(aes(x = x_new, y = y_new), size=2) +
xlab('x') + ylab('y')
Now, fit the model using gp_optim.
gp_empty_7 <- gp_init(
# A squared exponential (aka Gaussian aka RBF) kernel
cfs = cf_sexp(
vars = NULL,
lscale = 0.3,
magn = 1,
prior_lscale = prior_logunif(),
prior_magn = prior_logunif(),
normalize = FALSE
),
# Assume Gaussian distributed errors
lik = lik_gaussian(
sigma = 0.5,
prior_sigma = prior_logunif()
),
# Use the full covariance (i.e., do not approximate)
method = method_full()
)
Optimize.
gp_optimized_7 <- gp_optim(gp = gp_empty_7,
x = x_new,
y = y_new,
restarts = 5,
verbose = TRUE)
## Optimizing parameters
## p1: log cf_sexp.lscale
## p2: log cf_sexp.magn
## p3: log lik_gaussian.sigma
##
## p1 p2 p3 Energy Iteration
## -1.20 0.00 -0.69 21.92 0
## -1.08 0.00 -0.69 21.45 1
## -1.20 0.12 -0.69 19.95 2
## -1.20 0.00 -0.57 21.38 3
## -1.12 0.08 -0.61 20.04 4
## -1.14 0.06 -0.63 20.47 5
## -1.27 0.13 -0.56 19.57 6
## -1.36 0.20 -0.49 18.77 7
## -1.26 0.27 -0.63 18.15 8
## -1.28 0.40 -0.65 17.09 9
## -1.44 0.40 -0.61 17.24 10
## -1.36 0.32 -0.61 17.81 11
## -1.53 0.55 -0.48 16.39 12
## -1.69 0.76 -0.37 15.93 13
## -1.58 0.84 -0.60 15.91 14
## -1.69 1.16 -0.65 16.51 15
## -1.59 0.94 -0.47 15.99 16
## -1.55 0.80 -0.51 15.90 17
## -1.93 1.20 -0.33 16.68 18
## -1.77 1.00 -0.41 16.11 19
## -1.44 0.60 -0.57 16.20 20
## -1.69 0.90 -0.45 15.96 21
## -1.53 0.70 -0.53 15.98 22
## -1.65 0.85 -0.47 15.92 23
## -1.50 0.90 -0.68 15.92 24
## -1.55 0.87 -0.60 15.91 25
## -1.47 0.82 -0.67 15.88 26
## -1.38 0.81 -0.76 15.84 27
## -1.46 0.77 -0.64 15.90 28
## -1.48 0.79 -0.63 15.89 29
## -1.37 0.76 -0.67 15.85 30
## -1.42 0.78 -0.65 15.87 31
## -1.27 0.77 -0.87 15.77 32
## -1.13 0.75 -1.05 15.61 33
## -1.11 0.75 -1.02 15.57 34
## -0.92 0.74 -1.22 15.23 35
## -0.92 0.77 -1.36 15.23 36
## -0.70 0.78 -1.70 14.68 37
## -0.45 0.71 -1.88 13.94 38
## 0.02 0.65 -2.44 12.98 39
## 0.07 0.69 -2.52 12.67 40
## 0.67 0.66 -3.26 23.11 41
## 0.51 0.68 -3.22 16.47 42
## -0.56 0.72 -1.72 14.28 43
## 0.38 0.60 -2.76 15.45 44
## -0.43 0.73 -1.96 13.84 45
## 0.33 0.66 -2.90 13.68 46
## 0.11 0.68 -2.60 12.75 47
## 0.56 0.62 -3.08 19.80 48
## -0.18 0.70 -2.24 13.12 49
## 0.31 0.65 -2.80 13.75 50
## -0.06 0.69 -2.38 12.90 51
## 0.06 0.72 -2.56 12.53 52
## 0.08 0.75 -2.62 12.32 53
## 0.23 0.73 -2.78 12.41 54
## 0.16 0.72 -2.68 12.44 55
## 0.14 0.77 -2.68 12.10 56
## 0.16 0.81 -2.72 11.80 57
## 0.25 0.84 -2.89 11.58 58
## 0.34 0.91 -3.07 11.12 59
## 0.16 0.92 -2.83 11.37 60
## 0.18 0.87 -2.81 11.50 61
## 0.36 1.01 -3.12 10.46 62
## 0.51 1.14 -3.37 9.80 63
## 0.51 1.17 -3.47 9.61 64
## 0.68 1.34 -3.84 9.01 65
## 0.86 1.34 -4.03 11.88 66
## 0.34 1.03 -3.13 10.42 67
## 0.68 1.43 -3.82 8.37 68
## 0.84 1.69 -4.19 7.38 69
## 1.02 1.75 -4.48 8.62 70
## 0.85 1.57 -4.14 8.35 71
## 1.08 1.93 -4.74 7.29 72
## 1.36 2.33 -5.43 7.25 73
## 1.35 2.39 -5.33 6.40 74
## 1.68 2.91 -6.08 6.36 75
## 1.74 3.05 -6.33 5.62 76
## 2.19 3.78 -7.42 6.62 77
## 2.34 3.83 -7.70 13.26 78
## 1.22 2.23 -5.07 6.18 79
## 1.73 3.12 -6.22 4.56 80
## 1.92 3.52 -6.62 3.52 81
## 1.57 2.95 -5.93 3.85 82
## 1.60 2.94 -5.97 4.27 83
## 2.27 4.12 -7.52 3.61 84
## 2.00 3.65 -6.90 3.66 85
## 2.09 4.02 -7.05 1.80 86
## 2.27 4.50 -7.41 0.82 87
## 2.73 5.14 -8.43 1.79 88
## 2.44 4.59 -7.80 2.05 89
## 2.35 4.65 -7.45 0.77 90
## 2.39 4.92 -7.42 0.36 91
## 3.02 6.19 -8.89 -0.19 92
## 3.57 7.52 -10.02 -0.22 93
## 2.75 6.16 -8.14 0.75 94
## 2.75 5.90 -8.21 0.18 95
## 3.53 7.73 -9.70 0.67 96
## 3.22 6.92 -9.12 0.20 97
## 3.96 8.65 -10.81 1.04 98
## 2.78 5.85 -8.27 0.02 99
## 2.85 5.93 -8.54 -0.06 100
## 2.94 6.18 -8.69 -0.05 101
## 3.38 6.97 -9.68 -0.55 102
## 3.70 7.50 -10.41 -1.06 103
## 3.96 8.12 -11.05 -0.71 104
## 3.67 7.55 -10.36 -0.74 105
## 4.44 9.12 -11.98 0.71 106
## 3.25 6.73 -9.40 -0.34 107
## 3.51 6.99 -10.10 -1.11 108
## 3.49 6.73 -10.13 -1.24 109
## 3.99 7.79 -11.20 -1.46 110
## 4.36 8.33 -12.09 -0.99 111
## 3.79 7.13 -10.81 -1.62 112
## 3.85 6.92 -11.03 -0.68 113
## 3.81 6.94 -11.01 -1.11 114
## 3.78 7.08 -10.86 -1.57 115
## 4.22 7.94 -11.78 -1.15 116
## 3.67 7.03 -10.54 -1.59 117
## 3.50 6.37 -10.28 -0.76 118
## 3.87 7.44 -10.97 -1.63 119
## 3.77 7.32 -10.68 -1.59 120
## 3.77 7.26 -10.73 -1.64 121
## 3.95 7.52 -11.13 -1.59 122
## 3.88 7.40 -10.98 -1.64 123
## 3.89 7.60 -10.98 -1.54 124
## 3.81 7.25 -10.85 -1.65 125
## 3.77 7.17 -10.74 -1.65 126
## 3.80 7.24 -10.80 -1.66 127
## 3.71 7.10 -10.60 -1.62 128
## 3.84 7.32 -10.89 -1.65 129
## 3.86 7.28 -10.96 -1.61 130
## 3.79 7.27 -10.79 -1.65 131
## 3.81 7.30 -10.80 -1.65 132
## 3.81 7.26 -10.84 -1.66 133
## 3.77 7.19 -10.73 -1.65 134
## 3.82 7.29 -10.85 -1.66 135
## Assessing convergence...
## 3.90 7.24 -10.80 -1.42 136
## 3.70 7.24 -10.80 -1.50 137
## 3.80 7.34 -10.80 -1.63 138
## 3.80 7.14 -10.80 -1.60 139
## 3.80 7.24 -10.70 -1.66 140
## 3.80 7.24 -10.90 -1.66 141
## Optimization converged.
Optimal value of \(\sigma_f^2 = 7.24\) and optimal value of \(\ell=3.8\).
Pull the energy.
gp_energy(gp_optimized_7)
## [1] -1.65625
Plot the optimal model.
xt_more <- seq(-10, 10, len=150)
pred_7 <- gp_pred(gp = gp_optimized_7,
xnew = xt_more,
var = TRUE)
mu_pred_7 <- pred_7$mean
lb_pred_7 <- pred_7$mean - 2*sqrt(pred_7$var)
ub_pred_7 <- pred_7$mean + 2*sqrt(pred_7$var)
ggplot() +
geom_ribbon(aes(x=xt_more, ymin=lb_pred_7, ymax=ub_pred_7), fill='lightgray') +
geom_line(aes(x=xt_more, y=mu_pred_7), linewidth = 0.5) +
geom_point(aes(x=x_new, y=y_new), size=2) +
xlab('x') + ylab('y')
Recall, optimal value of \(\sigma_f^2 = 7.24\) and optimal value of \(\ell=3.8\). We will set \(\sigma_n^2 = 0.01\).
# using prior optimization, set sequence
ell <- seq(1, 6, by = 0.05)
# using prior optimization, set sequence
sigma_f <- seq(4, 10, by = 0.05)
hyperparams <- expand.grid(
ell = ell,
sigma_f = sigma_f
)
# collect energies in the empty vector
energy <- vector(mode = "numeric", length = dim(hyperparams)[1])
# run 6000+ models
for (i in 1:dim(hyperparams)[1]) {
gp_empty <- gp_init(
# A squared exponential (aka Gaussian aka RBF) kernel
cfs = cf_sexp(
vars = NULL,
lscale = hyperparams$ell[i],
magn = hyperparams$sigma_f[i],
prior_lscale = prior_logunif(),
prior_magn = prior_logunif(),
normalize = FALSE
),
# Assume Gaussian distributed errors
lik = lik_gaussian(
sigma = 0.01,
prior_sigma = prior_logunif()
),
# Use the full covariance (i.e., do not approximate)
method = method_full()
)
gp_raw <- gp_fit(gp_empty, x_new, y_new)
energy[i] <- gp_energy(gp_raw)
}
surface_plot <- cbind(hyperparams, energy)
surface_plot[which.min(surface_plot$energy),]
## ell sigma_f energy
## 12159 2.9 10 5.433649
Is the range of energies lower value?
range(surface_plot$energy)
## [1] 5.433649 97.868111
Plot the surface.
fig <- plot_ly() %>%
add_trace(
x = unique(surface_plot$sigma_f),
y = unique(surface_plot$ell),
z = matrix(surface_plot$energy, nrow = 101, ncol = 121),
type = "surface") %>%
add_trace(
x = 10,
y = 2.9,
z = 5.433,
type = "scatter3d",
mode = "markers"
) %>%
layout(
scene = list(
aspectmode = 'manual', # Set to manual for custom aspect ratio
aspectratio = list(x = 1, y = 1, z = 0.5), # Adjust x, y, z ratios
xaxis = list(
title = "signal variance"
),
yaxis = list(
title = "length-scale parameter"
),
zaxis = list(
title = "-log(marginal likelihood)"
)
)
)
fig
Plot the surface with lowest energies.
fig <- plot_ly() %>%
add_trace(
x = unique(surface_plot$sigma_f),
y = unique(surface_plot$ell)[1:45],
z = matrix(surface_plot$energy, nrow = 101, ncol = 121)[1:45, ],
type = "surface") %>%
add_trace(
x = 10,
y = 2.9,
z = 5.433,
type = "scatter3d",
mode = "markers"
) %>%
layout(
scene = list(
aspectmode = 'manual', # Set to manual for custom aspect ratio
aspectratio = list(x = 1, y = 1, z = 0.5), # Adjust x, y, z ratios
xaxis = list(
title = "signal variance"
),
yaxis = list(
title = "length-scale parameter"
),
zaxis = list(
title = "-log(marginal likelihood)"
)
)
)
fig
\(n=7\) works, but \(n=8\) or \(n=9\) does not.
# n is 8 now
n_new <- 8
x_new <- matrix(seq(-2, 2, length.out = n_new), ncol = 1)
y_new <- x_new^2
Plot the data.
ggplot() +
geom_point(aes(x = x_new, y = y_new), size=2) +
xlab('x') + ylab('y')
Now, fit the model using gp_optim.
gp_empty_8 <- gp_init(
# A squared exponential (aka Gaussian aka RBF) kernel
cfs = cf_sexp(
vars = NULL,
lscale = 0.3,
magn = 1,
prior_lscale = prior_logunif(),
prior_magn = prior_logunif(),
normalize = FALSE
),
# Assume Gaussian distributed errors
lik = lik_gaussian(
sigma = 0.5,
prior_sigma = prior_logunif()
),
# Use the full covariance (i.e., do not approximate)
method = method_full()
)
Optimize.
gp_optimized_8 <- gp_optim(gp = gp_empty_8,
x = x_new,
y = y_new,
restarts = 5,
verbose = FALSE)
## Error in chol.default(B): the leading minor of order 4 is not positive
mvtnormlibrary(bench)
pmvnorm is “smart” when it comes to single dimensional
computations as it passes the values to pnorm.
cat("pmvnorm\n\n")
## pmvnorm
pmvnorm(lower = -Inf, upper = 2, mean = 0, sigma = 1)
## upper
## 0.9772499
## attr(,"error")
## [1] 0
## attr(,"msg")
## [1] "univariate: using pnorm"
cat("\n\n")
cat("pnorm\n\n")
## pnorm
pnorm(2)
## [1] 0.9772499
pmvnorm(lower = c(-Inf, -Inf), upper = c(Inf, 2), mean = c(0,0), sigma = diag(2))
## [1] 0.9772499
## attr(,"error")
## [1] 2e-16
## attr(,"msg")
## [1] "Normal Completion"
bench::mark(
# 2d
pmvnorm(lower = c(-Inf, -Inf), upper = c(0, Inf),
mean = c(0,0), corr = diag(2)),
# 3d
pmvnorm(lower = c(-Inf, -Inf, -Inf), upper = c(0, Inf, Inf),
mean = c(0,0,0), corr = diag(3)),
check = FALSE
)
## # A tibble: 2 × 6
## expression min median `itr/sec` mem_alloc `gc/sec`
## <bch:expr> <bch:> <bch:> <dbl> <bch:byt> <dbl>
## 1 pmvnorm(lower = c(-Inf, -Inf), upp… 55.5µs 66.8µs 11598. 2.49KB 10.5
## 2 pmvnorm(lower = c(-Inf, -Inf, -Inf… 53.8µs 63.7µs 12449. 2.49KB 8.32
bench::mark(
# 2d
pmvnorm(lower = c(-Inf, -Inf), upper = c(0, Inf),
mean = c(0,0), corr = diag(2)),
# 3d
pmvnorm(lower = c(-Inf, -Inf, -Inf), upper = c(0, Inf, Inf),
mean = c(0,0,0), corr = diag(3),
algorithm = Miwa()),
check = FALSE
)
## # A tibble: 2 × 6
## expression min median `itr/sec` mem_alloc `gc/sec`
## <bch:expr> <bch:> <bch:> <dbl> <bch:byt> <dbl>
## 1 pmvnorm(lower = c(-Inf, -Inf), upp… 54.5µs 63.6µs 12423. 2.49KB 10.5
## 2 pmvnorm(lower = c(-Inf, -Inf, -Inf… 42.7µs 58.3µs 11449. 4.96KB 8.36
The Miwa algorithm does collapse multidimensional integrals.
pmvnorm(lower = c(-3, -Inf, -Inf), upper = c(0, Inf, Inf),
mean = c(0,0,0), corr = diag(3),
algorithm = Miwa())
## [1] 0.4986501
## attr(,"error")
## [1] 0
## attr(,"msg")
## [1] "Normal Complettion (dim reduced to 1)"
pmvnorm(lower = c(-Inf, -Inf, -4), upper = c(Inf, Inf, 0),
mean = c(0,0,0), corr = diag(3),
algorithm = Miwa())
## [1] 0.4999683
## attr(,"error")
## [1] 0
## attr(,"msg")
## [1] "Normal Complettion (dim reduced to 1)"
pmvnorm(lower = c(-Inf, -2, -Inf), upper = c(Inf, 0, Inf),
mean = c(0,0,0), corr = diag(3),
algorithm = Miwa())
## [1] 0.4772499
## attr(,"error")
## [1] 0
## attr(,"msg")
## [1] "Normal Complettion (dim reduced to 1)"
We can assess larger dimensional integrals.
bench::mark(
pmvnorm(
lower = c(-2, -0.3, 0, -0.6, 0.01, -1, -2, -3, -1, 0.2),
upper = rep(1, length.out = 10),
mean = rep(0, length.out = 10),
sigma = diag(10),
algorithm = GenzBretz()
),
pmvnorm(
lower = c(-2, -0.3, 0, -0.6, 0.01, -1, -2, -3, -1, 0.2),
upper = rep(1, length.out = 10),
mean = rep(0, length.out = 10),
sigma = diag(10),
algorithm = Miwa()
),
pmvnorm(
lower = c(-Inf, -Inf, -Inf),
upper = c(0, Inf, Inf),
mean = c(0, 0, 0),
corr = diag(3),
algorithm = GenzBretz()
),
pmvnorm(
lower = c(-Inf, -Inf, -Inf),
upper = c(0, Inf, Inf),
mean = c(0, 0, 0),
corr = diag(3),
algorithm = Miwa()
),
pmvnorm(
lower = c(-Inf, -Inf, -Inf, -Inf),
upper = c(0, Inf, Inf, Inf),
mean = c(0, 0, 0),
corr = diag(4),
algorithm = GenzBretz()
),
pmvnorm(
lower = c(-Inf, -Inf, -Inf, -Inf),
upper = c(0, Inf, Inf, Inf),
mean = c(0, 0, 0),
corr = diag(4),
algorithm = Miwa()
),
pmvnorm(
lower = c(-Inf, -0.3, 0, -0.6, 0.01, -Inf, -2, -3, -1, 0.2),
upper = c(Inf, 1, 3, 2, 1, 0.4, Inf, 1, 2, 3),
mean = rep(0, length.out = 10),
sigma = diag(10),
algorithm = GenzBretz()
),
pmvnorm(
lower = c(-Inf, -0.3, 0, -0.6, 0.01, -Inf, -2, -3, -1, 0.2),
upper = c(Inf, 1, 3, 2, 1, 0.4, Inf, 1, 2, 3),
mean = rep(0, length.out = 10),
sigma = diag(10),
algorithm = Miwa()
),
check = FALSE
)
## # A tibble: 8 × 6
## expression min median `itr/sec` mem_alloc `gc/sec`
## <bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
## 1 pmvnorm(lower = c(-2, -0.3, 0,… 13.65ms 15.28ms 64.6 15.63KB 0
## 2 pmvnorm(lower = c(-2, -0.3, 0,… 52.9ms 56.48ms 17.8 37.33KB 0
## 3 pmvnorm(lower = c(-Inf, -Inf, … 54.37µs 64.53µs 11454. 2.49KB 10.7
## 4 pmvnorm(lower = c(-Inf, -Inf, … 45.73µs 54.43µs 14057. 0B 8.38
## 5 pmvnorm(lower = c(-Inf, -Inf, … 239.11µs 350.98µs 2408. 74.28KB 6.20
## 6 pmvnorm(lower = c(-Inf, -Inf, … 186.39µs 280.43µs 3060. 67.24KB 6.19
## 7 pmvnorm(lower = c(-Inf, -0.3, … 7.14ms 7.76ms 128. 15.63KB 2.03
## 8 pmvnorm(lower = c(-Inf, -0.3, … 22.02ms 25.12ms 39.7 82.41KB 0